options compress=YES;
libname manasi ''; 

* SIBLING SSI APPLICATIONS;

* Restrict to HUN and application date;
data manasi.kid_apps_combined;
	set manasi.kids_allapp (keep=hun appl_jd);

	rename appl_jd=appdate;
run;

* Sort, no duplicates;
proc sort data=manasi.kid_apps_combined out=manasi.kid_apps_combined nodupkey 
	dupout=manasi.kid_apps_combined_dups;
	by hun appdate;
run;

* RESHAPE WIDE TO GET ONE OBSERVATION PER CHILD WITH ALL OF THAT CHILD'S APPS 
AND THEIR DATES;

proc sort data=manasi.kid_apps_combined out=manasi.kid_apps_combined ;
	by hun appdate;
run;

* Create ID variable for applications;
data manasi.kid_apps_combined_wide;
	set manasi.kid_apps_combined;
	app_id + 1;
	by hun;
	if first.hun then app_id=1;
run;

* See how many apps each kid has;
proc means data=manasi.kid_apps_combined_wide noprint;
	by hun;
	var app_id;
	output out=manasi.kid_apps_combined_appid max(app_id)=max_appid;
run;

proc means mean median min max p75 p90 p95 p99 data=manasi.kid_apps_combined_appid;
	var max_appid;
run;

* Found that 99th percentile is 5 apps, delete applications over 7;
data manasi.kid_apps_combined_wide;
	set manasi.kid_apps_combined_wide;
	if app_id<=7;
run;

* Collapse to HUN level to get one record for each kid;
data manasi.kid_apps_combined_wide;
	set manasi.kid_apps_combined_wide;
	by hun;

	keep 	hun 
			appdate1-appdate7
			;	

	retain 	appdate1-appdate7
			;

	array 	aappdate(1:7) 		appdate1-appdate7 ;

	if first.hun then
	do;
		do i=1 to 7;
			aappdate(i)=.;
		end;
	end;

	aappdate(app_id) = appdate ;	

	if last.hun then output;

run;

* Format date variables;
data manasi.kid_apps_combined_wide;
	set manasi.kid_apps_combined_wide;

	format appdate1 date9.;
	format appdate2 date9.;
	format appdate3 date9.;
	format appdate4 date9.;
	format appdate5 date9.;
	format appdate6 date9.;
	format appdate7 date9.;
run;

* CREATE A FILE OF APPLICATIONS FOR EACH KID-PARENT PAIR;

* USE ONLY HUN AND PAN FROM PARENTS' SSR RECORDS (approved and denied);
data manasi.parents_comb_hunpan;
	set manasi.parents_allapp(keep=hun pan toa mft birth_jd sex);
	if pan='*00000000' or pan='000000000' or pan=. or pan='' then delete;

	dobyy=year(birth_jd);
	drop birth_jd;
run;

* RETAIN UNIQUE HUN-PAN COMBINATIONS AND SORT BY HUN;
proc sort data=manasi.parents_comb_hunpan out=manasi.parents_comb_hunpan nodupkey;
	by hun pan;
run;

* SORT APPLICATION DATA BY SSN ;
proc sort data=manasi.kid_apps_combined_wide out=manasi.kid_apps_combined_wide;
	by hun;
run;

* MERGE IN SSI APPLICATION DATA FOR EACH KID;
data manasi.parents_comb_wkidapps;
	merge 	
			manasi.parents_comb_hunpan
				(rename=(pan=par_ssn toa=par_toa mft=par_mft dobyy=par_dobyy sex=par_sex) 
				in=fromparx)
			
			manasi.kid_apps_combined_wide
				(keep = hun appdate: 
				in=fromkidsx);

	by hun; 
	frompar = fromparx;
	fromkids = fromkidsx;
run;

proc tabulate data=manasi.parents_comb_wkidapps;
	class frompar fromkids;
	table frompar, fromkids;
run;

* SORT BY PARENT SSN TO KEEP SIBLINGS TOGETHER ;
proc sort data=manasi.parents_comb_wkidapps out=manasi.parents_comb_wkidapps;
	by par_ssn hun;
run;
